package com.atbeijing.bigdata.spark.mytest.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}


object Operator_groupBy {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf().setMaster("local[6]").setAppName("groupBy")
    val sc = new SparkContext(conf)

    //从服务器日志数据apache.log中获取2015年5月17日的请求路径
    val rdd: RDD[String] = sc.textFile("data/apache.log")
    val rdd1: RDD[String] = rdd.filter(str => {
      val strings: Array[String] = str.split(" ")
      val str1: String = strings(3)
      str1.contains("17/05/2015")
    })
    val rdd2: RDD[String] = rdd1.map(str => str.split(" ")(6))
    rdd2.collect().foreach(println)
  }
}
